National Parks Movement Analysis

Author
Affiliation

Harry Seely

University of British Columbia, Department of Forest Resources Management, Integrated Remote Sensing Studio

Published

Oct 18, 2024

1 Load Packages

library(here)
Warning: package 'here' was built under R version 4.4.1
here() starts at D:/Sync/National_Park_Movement_Paper
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
library(mapview)
library(ggplot2)
library(osmdata)
Warning: package 'osmdata' was built under R version 4.4.1
Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(DT)
library(parallel)
library(RColorBrewer)
library(nngeo)
Warning: package 'nngeo' was built under R version 4.4.1
library(cowplot)

Attaching package: 'cowplot'

The following object is masked from 'package:lubridate':

    stamp
library(readxl)
library(units)
udunits database from C:/Users/hseely/AppData/Local/R/win-library/4.4/units/share/udunits/udunits2.xml
library(webshot2)
Warning: package 'webshot2' was built under R version 4.4.1
library(patchwork)

Attaching package: 'patchwork'

The following object is masked from 'package:cowplot':

    align_plots
#Read custom functions
utils_fpath <-here("analysis/utils.R") 
source(utils_fpath)

2 Read data

#Read flickr data
df <- read_csv(here("data/clustered_data_with_clusters.csv"))
Rows: 15096 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (7): user_id, title, description, tags, url, park, Labels
dbl  (8): post_id, lat, lon, accuracy, date_upl, year, Image, cluster
dttm (1): date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Read cluster tags and add to df
df <- read_xlsx(here("data/cluster tag.xlsx")) %>%
  select(Cluster, `Proposed Tag`) %>%
  rename(cluster = Cluster, category = `Proposed Tag`) %>%
  right_join(df, by = "cluster")
  

#Read georeferenced trails data
georef_trails <- st_read(here("data/national_park_trails_georeferenced/Trails_after_georeferencing_ExportFeatures.shp"))
Reading layer `Trails_after_georeferencing_ExportFeatures' from data source 
  `D:\Sync\National_Park_Movement_Paper\data\national_park_trails_georeferenced\Trails_after_georeferencing_ExportFeatures.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 104 features and 1 field
Geometry type: LINESTRING
Dimension:     XYZ
Bounding box:  xmin: -13034480 ymin: 6569358 xmax: -12821140 ymax: 6843529
z_range:       zmin: 0 zmax: 0
Projected CRS: WGS 84 / Pseudo-Mercator
#Convert to spatial data (assuming WSGS 84 CRS) and reproject to UTM Zone 11N
df_sf <- df %>%
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(4326)) %>%
  st_transform(32611)
  

#Get bounding box for data and convert to polygon
obs_bbox <- st_bbox(df_sf)
obs_bbox_sf <- obs_bbox %>%
  st_as_sfc() %>%
  st_as_sf()

#Read national park boundaries for BC and AB
bc_parks <- st_read(here("data/national_park_boundaries/british_columbia/CLAB_BC_2023-09-08.shp"))
Reading layer `CLAB_BC_2023-09-08' from data source 
  `D:\Sync\National_Park_Movement_Paper\data\national_park_boundaries\british_columbia\CLAB_BC_2023-09-08.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 7 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -132.1088 ymin: 48.53914 xmax: -115.7991 ymax: 52.8101
Geodetic CRS:  NAD83(CSRS)
ab_parks <- st_read(here("data/national_park_boundaries/alberta/CLAB_AB_2023-09-08.shp"))
Reading layer `CLAB_AB_2023-09-08' from data source 
  `D:\Sync\National_Park_Movement_Paper\data\national_park_boundaries\alberta\CLAB_AB_2023-09-08.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 5 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -119.5439 ymin: 48.9975 xmax: -111.017 ymax: 60.69088
Geodetic CRS:  NAD83(CSRS)
#Combine park boundaries and reproject to correct CRS
parks <- bind_rows(bc_parks, ab_parks) %>%
  st_transform(st_crs(df_sf)) %>%
  mutate(NAME_E = str_to_title(NAME_E)) %>%
  mutate(NAME_E = str_replace_all(NAME_E, " National Park Of Canada", "")) %>%
  filter(!NAME_E %in% c("Mount Revelstoke", "Glacier"))

#Get parks that intersect with bounding box
parks_int <- st_intersection(parks, obs_bbox_sf)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
#Subset parks that intersect with bounding box
parks <- parks %>%
  filter(NAME_E %in% parks_int$NAME_E)

#Map parks
parks %>%
  ggplot(aes(fill = NAME_E)) + 
    geom_sf() +
    theme_bw() + 
    guides(fill = guide_legend(title = "Name"))

3 Spatially group observations by cluster

Assign time period for years relative to covid

df_sf <- df_sf %>%
  #Classify years by pre, peri, post
  mutate(period = case_when(year == 2019 ~ "pre",
                          year > 2019 &  year < 2023 ~ "peri",
                          year == 2023 ~ "post")) %>%
  mutate(period = factor(period, levels = c("pre", "peri", "post")))
print(table(df_sf$cluster))

   0    1    2    3    4    5    6    7    8    9 
1768 1660 2835 1075 2376 1269 2346  903  497  367 
df_clus_grp <- df_sf %>%
  group_by(cluster) %>%
  summarise() 

plot(df_clus_grp['cluster'])

#Summarize clusters spatially using the average position
df_clus_centroid <- df_sf %>%
  #Convert coords to columns
  mutate(lon = st_coordinates(.)[, 1],
         lat = st_coordinates(.)[, 2]) %>%
  st_drop_geometry() %>%
  group_by(period, cluster) %>%
  #Get cluster centroids (mean lat/long)
  summarise(lon = mean(lon),
            lat = mean(lat)) %>%
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(df_sf)) %>%
  mutate(cluster = as.factor(cluster))
`summarise()` has grouped output by 'period'. You can override using the
`.groups` argument.
df_clus_centroid %>%
  ggplot() +
  geom_sf(aes(color = cluster)) +
  facet_wrap(~period) +
  theme_bw()

4 Quantify direction of shifts in cluster centroids

Reproduce the figure generated in Liang et al. (2022) that shows the direction of shifts in cluster centroids

Reference: Liang, Yu, et al. “What is the role of disturbance in catalyzing spatial shifts in forest composition and tree species biomass under climate change?.” Global Change Biology 29.4 (2023): 1160-1177.

#Function to derive the distance between two points and the angle between them
compare_dist_n_angle <- function(pt1, pt2){
  
  dist <- st_distance(pt1, pt2) %>% 
    as.numeric() -> dist
  
  #Divide by 1000 to convert to km
  dist <- dist/1000
  
  azimuth <- st_azimuth(pt1, pt2) %>% 
    as.numeric() -> angle
  
  # Convert to cartesian coordinates
  x <- dist * cos(azimuth)
  y <- dist * sin(azimuth)
  
  #Invert the x coordinate since the UTM coordinate system increases eastward
  x <- -x
  
  #Combine into df
  out_df <- data.frame(dist = dist, angle = angle, x = x, y = y)
  
  #Assign angle a direction
  out_df <- out_df %>%
  mutate(direction = case_when(angle >= 0 & angle < 90 ~ "NE",
                               angle >= 90 & angle < 180 ~ "SE",
                               angle >= 180 & angle < 270 ~ "SW",
                               angle >= 270 ~ "NW")) %>%
    relocate(direction, .after = dist)
  
  return(out_df)
  
}

#Test for cluster 1 in pre and post
test_pt1 <-df_clus_centroid %>% filter(period == "pre" & cluster == 1)
test_pt2 <- df_clus_centroid %>% filter(period == "post" & cluster == 1)

#Test shift direction and magnitude for two points pre and post

test_out <- compare_dist_n_angle(test_pt1, test_pt2)

shift_plt <- test_out %>%
  ggplot(aes(x = x, y = y)) +
    geom_point(color = "red") +
    xlim(-max(test_out$dist), max(test_out$dist)) +
    ylim(-max(test_out$dist), max(test_out$dist)) +
    #Add horizontal and vertical lines at x=0 and y=0
    geom_hline(yintercept = 0, color = "black") +
    geom_vline(xintercept = 0, color = "black") +
    coord_fixed() +
    theme_bw() +
    labs(x = "East-West Centroid Shift (km)", 
         y = "North-South Centroid Shift (km)") + 
    #Remove all gridlines
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(),
          axis.line = element_line(colour = "black"),
          #Add black oultine to entire plot
          panel.border = element_rect(colour = "black", fill = NA, size = 1))
Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
cluster_loc_plt <- df_clus_centroid %>%
  filter(period %in% c("pre", "post") & cluster == 1) %>%
  ggplot() +
  geom_sf(aes(color = period)) +
  theme_bw()

plot_grid(cluster_loc_plt, shift_plt, nrow = 1, 
          labels = c("Original Locations", "Shift"))

Apply shift analysis to all clusters for pre-per; peri-post; and pre-post

#List of time frames to compare
timeframes_ls <- list(c("pre", "peri"), c("peri", "post"), c("pre", "post"))

#Make list to store all the shift dfs
shift_df_ls <- list()

for(t_frame in timeframes_ls){
  
  t1 <- t_frame[1]
  t2 <- t_frame[2]
  
  for(c in unique(df_clus_centroid $cluster)){
      
      pt1 <- df_clus_centroid  %>% filter(period == t1 & cluster == c)
      pt2 <- df_clus_centroid  %>% filter(period == t2 & cluster == c)
      
      time_nm <- paste0(t1, "_to_", t2)
      
      ls_item_nm <- paste0("cluster_", c, "_", time_nm) 
      
      shift_df_ls[[ls_item_nm]] <- compare_dist_n_angle(pt1, pt2) %>%
        mutate(cluster = c,
               timeframe = time_nm) %>%
        relocate(cluster, timeframe, .before = dist)
               
  }
}
    

cluster_shift_df <- bind_rows(shift_df_ls) %>%
  #Fix time period names
  mutate(timeframe = case_when(timeframe == "pre_to_peri" ~ "Pre - Peri",
                               timeframe == "peri_to_post" ~ "Peri - Post",
                               timeframe == "pre_to_post" ~ "Pre - Post")) %>%
  mutate(timeframe = factor(timeframe, 
  levels = c("Pre - Peri", "Peri - Post", "Pre - Post")))

  
cluster_shift_df %>%
  ggplot(aes(x = x, y = y, color = cluster)) +
    geom_point() +
    xlim(-max(cluster_shift_df$dist), max(cluster_shift_df$dist)) +
    ylim(-max(cluster_shift_df$dist), max(cluster_shift_df$dist)) +
    #Add horizontal and vertical lines at x=0 and y=0
    geom_hline(yintercept = 0, color = "black") +
    geom_vline(xintercept = 0, color = "black") +
    coord_fixed() +
    facet_wrap(~timeframe) +
    theme_bw() +
    labs(x = "East-West Centroid Shift (km)", 
         y = "North-South Centroid Shift (km)") + 
    #Remove all gridlines
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(),
          axis.line = element_line(colour = "black"),
          #Add black oultine to entire plot
          panel.border = element_rect(colour = "black", fill = NA, size = 1))

Summarize shifts in tabular format

cluster_shift_df %>%
  mutate_if(is.numeric, round, 2) %>%
  DT::datatable()

The clusters with the largest shifts in centroid position were 7 and 9. We can map these in more detail

centroids_7_n_9 <- df_clus_centroid %>%
  filter(cluster %in% c(7, 9)) %>%
  mutate(centroid = TRUE)

df_clusters_7_9_w_centroids <- df_sf %>%
    mutate(cluster = as.factor(cluster)) %>%
    filter(cluster %in% c(7, 9)) %>%
    bind_rows(centroids_7_n_9) %>%
    mutate(centroid = if_else(is.na(centroid), FALSE, centroid))


df_clusters_7_9_w_centroids %>%
  ggplot() +
  geom_sf(aes(color = cluster, shape=centroid, size = centroid)) +
  facet_wrap(~period) +
  scale_colour_brewer(palette = "Set1") +
  scale_shape_manual(values = c(19, 3)) +
  scale_size_manual(values = c(1, 3)) +
  theme_bw()

5 Load OSM line data

Check out OSM highway types: https://wiki.openstreetmap.org/wiki/Key:highway

#Ensure this cell does not generate any output text when knitted


     bridleway         busway       corridor       cycleway        disused 
            25              6             12            269              3 
        escape        footway  living_street       motorway  motorway_link 
             2           3047              3            156            123 
          path     pedestrian        primary   primary_link    residential 
          5064             11            393              8           2636 
     rest_area           road      secondary secondary_link        service 
            23             13            433             19           4528 
         steps       tertiary  tertiary_link          track          trunk 
           224           1946             12           5066            702 
    trunk_link   unclassified    via_ferrata 
           111           7181              1 
Deleting layer `osm_lines' using driver `GPKG'
Writing layer `osm_lines' to data source 
  `D:/Sync/National_Park_Movement_Paper/data/osm_data/osm_lines.gpkg' using driver `GPKG'
Writing 32017 features with 10 fields and geometry type Unknown (any).

   via_ferrata         escape        disused  living_street         busway 
             1              2              3              3              6 
  primary_link     pedestrian       corridor  tertiary_link           road 
             8             11             12             12             13 
secondary_link      rest_area      bridleway     trunk_link  motorway_link 
            19             23             25            111            123 
      motorway          steps       cycleway        primary      secondary 
           156            224            269            393            433 
         trunk       tertiary    residential        footway        service 
           702           1946           2636           3047           4528 
          path          track   unclassified 
          5064           5066           7181 

6 Snap Flickr points to nearest line

Create a test area for demonstrating the snapping method

test_dist <- 50000

test_bbox <- obs_bbox

test_bbox['xmin'] <- obs_bbox['xmax'] - test_dist
test_bbox['ymin'] <- obs_bbox['ymin']
test_bbox['xmax'] <- obs_bbox['xmax']
test_bbox['ymax'] <- obs_bbox['ymin'] + test_dist

test_bbox_sf <- test_bbox %>%
  st_bbox() %>%
  st_as_sfc() %>%
  st_as_sf()

#View test area in relation to total area
ggplot() + 
  geom_sf(data = obs_bbox_sf, fill = NA, col = "black") +
  geom_sf(data = test_bbox_sf, fill = NA, col = "red") +
  theme_bw()

print("test area is (size in meters squared")
[1] "test area is (size in meters squared"
print(st_area(test_bbox_sf))
2.5e+09 [m^2]
#Get lines and points within test area

test_lines <- osm_lines %>%
  st_intersection(test_bbox_sf)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
test_points <- df_sf %>%
  st_intersection(test_bbox_sf)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
ggplot() + 
  geom_sf(data = test_bbox_sf, fill = NA, col = "black") +
  geom_sf(data = test_lines, col = "grey") +
  geom_sf(data = test_points, col = "red", size = 0.9) + 
  theme_bw()

snap_points function based on this post: https://stackoverflow.com/questions/51292952/snap-a-point-to-the-closest-point-on-a-line-segment-using-sf

# remove any object called "c", this breaks the concat fn
rm(c)
test_points_snap = snap_points(test_points, test_lines, 1000)

#Filter the points that did not snap
test_points_snap_fail <- test_points_snap %>%
  filter(!snapped)

test_points_snap_success <- test_points_snap %>%
  filter(snapped)

mapview(test_points, col.regions = "blue") + 
  mapview(test_points_snap_fail, col.regions = "red") + 
  mapview(test_points_snap_success, col.regions = "green") + 
  mapview(test_lines, color = "grey")
print("The mean snap distance is (in meters):")
[1] "The mean snap distance is (in meters):"
mean(test_points_snap$snapped_distance)
[1] 26.80383

To speed up processing time, we can grid the study area and snap points within each grid cell

#Create a grid of study area
grid_size <- 50000 #50km grid
grid <- st_make_grid(obs_bbox_sf, cellsize = grid_size)

#Assign each grid cell an id
grid <- grid %>%
  st_as_sf() %>%
  mutate(grid_id = row_number())

#Plot grid
ggplot() + 
  geom_sf(data = obs_bbox_sf, fill = NA, col = "black") +
  geom_sf(data = grid, fill = NA, col = "red") +
  theme_bw()

Write function that can process snapping over a grid

snap_points_gridcell <- function(gridcell, points, lines, max_dist, save_dir, verbose=F) {
  
  source(utils_fpath)
  
  #Fault tolerance
  tryCatch({
  
    #Clip the points
    points_clip <- st_intersection(points, gridcell)
    
    n_points_in_cell <- nrow(points_clip)
    
    #Only proceed if there are points in the grid cell
    if(n_points_in_cell > 0){
    
      lines_clip <- st_intersection(lines, gridcell)
      
      #Check number of points in grid cell
      
      if(verbose){
        print(paste("There are", 
                    n_points_in_cell, 
                    "points in grid cell", 
                    gridcell$grid_id))}
      
      #Apply snapping
      points_snapped = snap_points(points_clip, lines_clip, max_dist)
      
      #Export snapped points
      fname <- paste0("snapped_points_", gridcell$grid_id, ".gpkg")
      st_write(points_snapped, file.path(save_dir, fname), append = F)
      
    } else {
      if(verbose){
        print(paste("No points in grid cell", gridcell$grid_id))}
    }
    
  }, error = function(e){
    
    #Print error and traceback
    print(paste("Error with grid cell id:", gridcell$grid_id))
    print(e)})
  
}

#Set vars for snapping
snap_points_outdir <- here("data/snapped_points_in_grid")
max_snap_dist <- 1000 #in meters

#Get test grid cell
test_cell_id <- 12
test_cell <- grid[grid$grid_id == test_cell_id,]

#Test function on one grid cell
snap_points_gridcell(gridcell=test_cell,
                 points=df_sf, 
                 lines=osm_lines, 
                 max_dist=max_snap_dist, 
                 save_dir=snap_points_outdir,
                 verbose=T
                 )
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
[1] "There are 32 points in grid cell 12"
Deleting layer `snapped_points_12' using driver `GPKG'
Writing layer `snapped_points_12' to data source 
  `D:/Sync/National_Park_Movement_Paper/data/snapped_points_in_grid/snapped_points_12.gpkg' using driver `GPKG'
Writing 32 features with 19 fields and geometry type Point.
#Load and view snapped points
snapped_points_test_gridcell <- st_read(file.path(snap_points_outdir, 
                                                  paste0("snapped_points_", 
                                                  test_cell_id, 
                                                  ".gpkg")))
Reading layer `snapped_points_12' from data source 
  `D:\Sync\National_Park_Movement_Paper\data\snapped_points_in_grid\snapped_points_12.gpkg' 
  using driver `GPKG'
Simple feature collection with 32 features and 19 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 607178.2 ymin: 5666023 xmax: 612955.7 ymax: 5682032
Projected CRS: WGS 84 / UTM zone 11N
snapped_points_test_gridcell_success <- snapped_points_test_gridcell %>%
  filter(snapped)

ggplot() + 
  geom_sf(data = test_cell, fill = NA, col = "black") +
  geom_sf(data = df_sf, col = "red") +
  geom_sf(data = snapped_points_test_gridcell_success, col = "green") +
  geom_sf(data = osm_lines) + 
  #Limit plot to snapped points bbox
  coord_sf(xlim = st_bbox(snapped_points_test_gridcell_success)[c("xmin", "xmax")],
           ylim = st_bbox(snapped_points_test_gridcell_success)[c("ymin", "ymax")]) +
  theme_bw() + 
  #Add legend to denote sf feature colours
  scale_color_manual(values = c("blue", "green", "black")) + 
  labs(color = "Feature type")

Process snapping across grid (sequential processing for now, might upgrade to parallel later…)

Currently takes about 9 minutes to run sequentially on my machine.

RUN_POINT_SNAPPING <- FALSE
RM_SNAP_PTS_FILES <- FALSE

if(RM_SNAP_PTS_FILES){
  files_rm = do.call(file.remove, list(list.files(snap_points_outdir, full.names = TRUE)))
}

if(RUN_POINT_SNAPPING){
  
  start_time <- Sys.time()
  
  for(cell_id in unique(grid$grid_id)){
    
    print(paste("Processing grid cell", cell_id, "of", nrow(grid), "cells."))
    
    target_cell <- grid[grid$grid_id == cell_id,]
    
    snap_points_gridcell(gridcell=target_cell,
                         points=df_sf, 
                         lines=osm_lines, 
                         max_dist=max_snap_dist, 
                         save_dir=snap_points_outdir,
                         verbose=T)
    
  }
  
  # Calculate elapsed time in seconds
  end_time <- Sys.time()
  t_elapsed <- as.numeric(difftime(end_time, start_time, units = "secs"))
  
  # Convert elapsed time tominutes and seconds
  elapsed_minutes <- floor((t_elapsed %% 3600) / 60)
  elapsed_seconds <- t_elapsed %% 60
  
  # Print the elapsed time in hours, minutes, and seconds
  cat("Point snapping processing time:", 
      elapsed_minutes, "minutes,", 
      elapsed_seconds, "seconds.")
}

7 Examine snapped points

snap_pts_ls <- lapply(list.files(snap_points_outdir, full.names = TRUE), st_read, quiet = T)

snap_pts <- bind_rows(snap_pts_ls)

print("Percentage of points that successfully snapped:")
[1] "Percentage of points that successfully snapped:"
round(table(snap_pts$snapped)/nrow(snap_pts)*100, 2)

FALSE  TRUE 
 0.34 99.66 
print("Mean snap distance (in meters):")
[1] "Mean snap distance (in meters):"
mean(snap_pts$snapped_distance)
[1] 27.09744
hist(snap_pts$snapped_distance, main = "Histogram of snap distances", xlab = "Snap Distance (m)")

#Remove points that did not snap (too far from roads/trails)
snap_pts <- snap_pts %>%
  filter(snapped)

#Export all snapped points
st_write(snap_pts, file.path(here("data"), "snapped_points_all.gpkg"), append = F)
Deleting layer `snapped_points_all' using driver `GPKG'
Writing layer `snapped_points_all' to data source 
  `D:/Sync/National_Park_Movement_Paper/data/snapped_points_all.gpkg' using driver `GPKG'
Writing 15045 features with 20 fields and geometry type Point.

Summarize individual user observations

#First, check to see if users have multiple years of observations
users_obs_yrs<- df_sf %>%
  st_drop_geometry() %>%
  group_by(user_id) %>%
  summarise(n_years = length(unique(year))) %>%
  mutate(multi_year = ifelse(n_years > 1, TRUE, FALSE))

#Check
print("How many users have visited the park over multiple years?")
[1] "How many users have visited the park over multiple years?"
print(table(users_obs_yrs$multi_year))

FALSE  TRUE 
  515    84 
#If user is multi-year, split into multiple users labeled as user_id_[year]
multi_yr_user_ids <-  users_obs_yrs %>%
  filter(multi_year) %>%
  pull(user_id)

#user_id_yr => user ID where unique users are split into multiple users if multi-year visitor
df_sf <- df_sf %>%
  mutate(user_id_yr = if_else(user_id %in% multi_yr_user_ids, 
                             paste0(user_id, "_", year), user_id))

print("Number of unique users")
[1] "Number of unique users"
length(unique(df_sf$user_id))
[1] 599
print("Number of unique users after splitting multi-year users")
[1] "Number of unique users after splitting multi-year users"
length(unique(df_sf$user_id_yr))
[1] 714
#Group users based on user_id_yr
grouped_sf <- df_sf %>%
  group_by(user_id_yr) %>%
  summarise(n_obs = n()) %>%
  #Remove obs where user only has 1 observation
  filter(n_obs > 1) %>%
  st_cast("MULTIPOINT")

DT::datatable(grouped_sf)
hist(grouped_sf$n_obs)

print(median(grouped_sf$n_obs))
[1] 7

8 Visualize density of obs on given sections of the network

Classify OSM lines as either roads or trails

trail_nms <- c("footway", "path", "track", "bridleway", "steps", "cycleway", 
               "via_ferrata")

#Classify OSM lines into trails and roads
osm_lines <- osm_lines %>%
  mutate(line_class = if_else(highway %in% trail_nms, "trail", "road"))

#Assign line names to those that do not have them
osm_lines <-  osm_lines %>%
  mutate(name = ifelse(is.na(name), paste0(highway, "_", osm_id), name))

Get number of flickr images and distinct users each line segment. Use the snapped points to do this with a very small buffer (1m)

#Assign unique id to each line segment
osm_lines <- osm_lines %>%
  mutate(line_id = row_number())

osm_lines_buf <- osm_lines %>%
  st_buffer(1)

#Intersect obs from each year with line
yr_pts_near_line_ls <- list()
yr__pts_near_line_clust_ls <- list()

for(yr in unique(snap_pts$year)){
  
  print(paste("Processing year", yr))
  
  snap_pts_yr <- snap_pts %>%
    filter(year == yr)
  
  pts_near_line <- st_intersection(osm_lines_buf, snap_pts_yr) %>%
        st_drop_geometry()
  
  #Summarize for line segments
  pts_near_line_summary <- pts_near_line %>%
    group_by(line_id) %>%
    summarise(n_obs = n(),
              n_users = n_distinct(user_id_yr)
              ) %>%
    mutate(year = yr)
  

  yr_pts_near_line_ls[[as.character(yr)]] <- pts_near_line_summary
  
  #Summarize for line segments AND clusters
  pts_near_line_summary <- pts_near_line %>%
    group_by(line_id, cluster) %>%
    summarise(n_obs = n(),
              n_users = n_distinct(user_id_yr)
              )%>%
    mutate(year = yr)
  
  yr__pts_near_line_clust_ls[[as.character(yr)]] <- pts_near_line_summary
  
}
[1] "Processing year 2019"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2020"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2021"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2022"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2023"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
#Combine df list into single df
pts_near_line_summary <- bind_rows(yr_pts_near_line_ls)
pts_near_line_summary_clust <- bind_rows(yr__pts_near_line_clust_ls)



DT::datatable(pts_near_line_summary)
#Join to lines
osm_lines_d <- left_join(osm_lines, pts_near_line_summary, by = "line_id") %>%
  mutate(n_users = ifelse(is.na(n_users), 0, n_users),
         n_obs = ifelse(is.na(n_obs), 0, n_obs))
         

osm_lines_d <- osm_lines_d %>%
  mutate(traffic_class = case_when(n_users == 0 ~ "none",
                             n_users == 1 ~ "1",
                             n_users == 2 ~ "2",
                             n_users > 2 ~ ">2")) %>%
  mutate(traffic_class = factor(traffic_class, levels = c("none", 
                                                          "1", 
                                                          "2", 
                                                          ">2")))


osm_lines_d %>%
  filter(n_users > 0) %>%
  ggplot() + 
    geom_sf(aes(color = traffic_class), lwd = 1.2) + 
    facet_wrap(~year) +
    scale_color_manual(values = c("yellow", "orange", "red")) +
    theme_bw() +
  #Set background to dark grey
  theme(panel.background = element_rect(fill = "grey20"),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        #Add black oultine to entire plot
        panel.border = element_rect(colour = "black", fill = NA, size = 1)) 

Analyze traffic on trails vs. roads

#Summarize traffic on trails vs. roads over years
osm_lines_d %>%
  st_drop_geometry() %>%
  group_by(year, line_class) %>%
  summarise(n_users = sum(n_users)) %>%
  ggplot() + 
    geom_point(aes(x = year, y = n_users, color = line_class)) +
    geom_line(aes(x = year, y = n_users, color = line_class)) + 
  theme_bw() + 
  labs(
       x = "Year",
       y = "Number of Users",
       color = "Route Class")
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_line()`).

View top roads and trails for each year

#Get the total number of road and trail obs for each year
yr_route_sum <- osm_lines_d %>%
  st_drop_geometry() %>%
  group_by(year, line_class) %>%
  summarise(total_yr_users = sum(n_users))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
#View the top three roads and trails that had the most traffic across years
top_routes_df <- osm_lines_d %>%
  st_drop_geometry() %>%
  filter(!is.na(name)) %>%
  group_by(year, line_class, name) %>%
  summarise(n_users = sum(n_users)) %>%
  left_join(yr_route_sum, by = c("year", "line_class")) %>%
  mutate(perc_users = round(n_users/total_yr_users*100, 1)) %>%
  arrange(year, line_class, desc(perc_users)) %>%
  slice_max(n_users, n = 3)
`summarise()` has grouped output by 'year', 'line_class'. You can override
using the `.groups` argument.
top_routes_df %>%
  filter(line_class == "road") %>%
  DT::datatable()
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

View for trails

top_routes_df %>%
  filter(line_class == "trail") %>%
  DT::datatable()
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

9 View change in most frequented road/trail over time

N_TOP_ROUTES <- 3

#Get top routes that have observations for each year
top_routes_filt <- osm_lines_d %>%
  st_drop_geometry() %>%
  filter(!is.na(name)) %>%
  group_by(line_class, name, year) %>%
  summarise(n_users = sum(n_users)) %>%
  filter(!is.na(year)) %>%
  pivot_wider(names_from = year, values_from = n_users, values_fill = 0) %>%
  filter(`2019` > 0 & 
          `2020` > 0 & 
          `2021` > 0 & 
          `2022` > 0 &
          `2023` > 0) %>%
  pivot_longer(cols = c(`2019`, `2020`, `2021`, `2022`, `2023`), 
               names_to = "year", 
               values_to = "n_users") 
`summarise()` has grouped output by 'line_class', 'name'. You can override
using the `.groups` argument.
top_n_road_names <- top_routes_filt %>%
  filter(line_class == "road") %>%
  group_by(line_class, name) %>%
  summarise(n_users = sum(n_users)) %>%
  slice_max(n_users, n = N_TOP_ROUTES) %>%
  pull(name)
`summarise()` has grouped output by 'line_class'. You can override using the
`.groups` argument.
top_n_trail_names <- top_routes_filt %>%
  filter(line_class == "trail") %>%
  group_by(line_class, name) %>%
  summarise(n_users = sum(n_users)) %>%
  slice_max(n_users, n = N_TOP_ROUTES, with_ties=F) %>%
  pull(name)
`summarise()` has grouped output by 'line_class'. You can override using the
`.groups` argument.
#Set theme
top_routes_theme <- theme_bw() + 
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.direction = "vertical",
    plot.title = element_text(hjust = 0.5, face = "bold")
    )

#Plot roads and trails separately and join
top_roads_plt <- top_routes_filt %>%
  filter(line_class == "road") %>%
  filter(name %in% top_n_road_names) %>%
  ggplot() + 
      geom_line(aes(x = year, y = n_users, color = name, group = name)) + 
      geom_point(aes(x = year, y = n_users, color = name)) +
      theme_bw() + 
      labs(
        title = "Roads",
         x = "Year",
         y = "Number of Users",
         color = "Road Name") + 
  top_routes_theme

top_trails_plt <- top_routes_filt %>%
  filter(line_class == "trail") %>%
  filter(name %in% top_n_trail_names) %>%
  ggplot() + 
      geom_line(aes(x = year, y = n_users, color = name, group = name)) + 
      geom_point(aes(x = year, y = n_users, color = name)) +
      theme_bw() + 
      labs(
         title = "Trails",
         x = "Year",
         y = "Number of Users",
         color = "Trail Name") + 
  top_routes_theme

top_roads_plt + top_trails_plt

10 Make an interactive map of road frequency

Get names of osm lines that do not have any observations

osm_lines_d %>%
  filter(traffic_class == "none") %>%
  pull(highway) %>%
  table() 
.
     bridleway         busway       corridor       cycleway        disused 
            24              6             11            260              3 
        escape        footway  living_street       motorway  motorway_link 
             2           2683              2            143            115 
          path     pedestrian        primary   primary_link    residential 
          4658             11            306              7           2592 
     rest_area           road      secondary secondary_link        service 
            10             13            372             19           4249 
         steps       tertiary  tertiary_link          track          trunk 
           199           1868             11           5022            627 
    trunk_link   unclassified    via_ferrata 
           107           7126              1 

Clip OSM lines to parks

osm_lines_d_clip <- st_intersection(osm_lines_d, parks)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries

Get differences in road/trail use density pre-peri; prei-post; pre-post

#Summarize differences
osm_lines_d_by_yr <- osm_lines_d_clip %>%
  st_drop_geometry() %>%
  filter(!is.na(year)) %>%
  mutate(period = case_when(year == 2019 ~ "pre",
                          year > 2019 &  year < 2023 ~ "peri",
                          year == 2023 ~ "post")) %>%
  group_by(osm_id, period) %>%
  summarize(n_obs = sum(n_obs)) %>%
  select(osm_id, period, n_obs) %>%
  pivot_wider(names_from = period, values_from = n_obs, values_fill = 0) %>%
  mutate(pre_peri_dif = peri - pre,
         peri_post_dif = post - peri,
         pre_post_dif = post - pre) %>%
  select(osm_id, pre_peri_dif, peri_post_dif, pre_post_dif)
`summarise()` has grouped output by 'osm_id'. You can override using the
`.groups` argument.
#Add geometry
osm_lines_d_by_yr <- osm_lines_d_clip %>%
  select(osm_id, geometry) %>%
  left_join(osm_lines_d_by_yr, by = "osm_id") %>%
  mutate_if(is.numeric, replace_na, 0) %>%
  pivot_longer(cols = c(pre_peri_dif, peri_post_dif, pre_post_dif), 
               names_to = "time_period", 
               values_to = "diff_obs") %>%
  #Reclassify into difference bins
  mutate(diff_class = case_when(diff_obs < 0 ~ "Decrease",
                              diff_obs == 0 ~ "No Change",
                              diff_obs > 0 ~ "Increase")) %>%
  #Rename time period values
  mutate(time_period = case_when(time_period == "pre_peri_dif" ~ "Pre - Peri",
                                 time_period == "peri_post_dif" ~ "Peri - Post",
                                 time_period == "pre_post_dif" ~ "Pre - Post")) %>%
  mutate(diff_class = factor(diff_class, 
                             levels = c("Decrease", "No Change", "Increase")),
         time_period = factor(time_period, 
                              levels = c("Pre - Peri", "Peri - Post", "Pre - Post")))

lines_no_changes <- osm_lines_d_by_yr %>%
  filter(diff_class == "No Change")

lines_with_changes <- osm_lines_d_by_yr %>%
  filter(diff_class != "No Change")

dif_pal <- c("red", "blue")

#Visualize differences in road/trail use density over time
traffic_over_time_dif_plt <- ggplot() + 
      geom_sf(data = parks, fill = "white", color = "black") + 
      geom_sf(data = lines_no_changes, color = "grey", lwd = 0.1) + 
      geom_sf(data = lines_with_changes, aes(color = diff_class), fill = NA, lwd = 1) +
      theme_bw() + 
      scale_linetype_manual(values = c("solid", "longdash")) + 
      scale_color_manual(name = "Change", values = dif_pal) + 
      facet_wrap(~time_period) +
      theme(panel.grid.major = element_blank(), 
            strip.background = element_rect(color=NA, fill=NA, size=1.5, linetype="solid"),
            strip.text = element_text(face = "bold"),
            panel.grid.minor = element_blank(),
            axis.line = element_line(colour = "black"),
            panel.border = element_rect(colour = "black", fill = NA, size = 1),
            legend.position = "bottom",
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            axis.title = element_blank(),
            )

traffic_over_time_dif_plt

Export change figure

ggsave(here("figures/traffic_change_over_time.png"),
       traffic_over_time_dif_plt,
       height = 12,
       width = 15,
       dpi = 600
       )

Map of road/trail use density

#Re-assign traffic class var to new vals
osm_lines_d_clip <- osm_lines_d_clip %>%
  mutate(traffic_class2 = if_else(traffic_class == '1', "Low", 
                          if_else(traffic_class == '2', "Medium", 
                          if_else(traffic_class == '>2', "High", 
                                  "None")))) %>%
  mutate(traffic_class2 = factor(traffic_class2, 
                                 levels = c("None", "Low", "Medium", "High")))



road_freq_pal <- c('yellow', '#fd8d3c', "red")

lines_w_obs <- osm_lines_d_clip %>%
  filter(n_obs > 0)

lines_no_obs <- osm_lines_d_clip %>%
  filter(n_obs == 0) %>%
  select(-year)

traffic_plt <- ggplot() + 
      geom_sf(data = parks, fill = "#355E3B", color = "black", alpha = 0.8) + 
      geom_sf(data = lines_no_obs, color = "grey", lwd = 0.1) + 
      geom_sf(data = lines_w_obs, aes(color = traffic_class2), fill = NA, lwd = 1) +
      theme_bw() + 
      scale_linetype_manual(values = c("solid", "longdash")) + 
      scale_color_manual(name = "Traffic Class", values = road_freq_pal) + 
      facet_wrap(~year) +
      theme(panel.grid.major = element_blank(), 
            strip.background = element_rect(color=NA, fill=NA, size=1.5, linetype="solid"),
            strip.text = element_text(face = "bold"),
            panel.grid.minor = element_blank(),
            axis.line = element_line(colour = "black"),
            panel.border = element_rect(colour = "black", fill = NA, size = 1),
            legend.position = "bottom",
            axis.ticks = element_blank(),
            axis.text = element_blank(),
            axis.title = element_blank(),
            )

traffic_plt

Export traffic plt

ggsave(here("figures/annual_park_road_trail_traffic.png"),
       traffic_plt,
       height = 12,
       width = 15,
       dpi = 600
       )

Interactive map of road/trail use density

#Generate map
interactive_traffic_map <- mapview(lines_w_obs, 
                                   zcol = "traffic_class", 
                                   col.regions = road_freq_pal)

#Export map to html
mapshot2(interactive_traffic_map,
         url = here("figures/traffic_interactive_map/traffic_interactive_map.html"))


#View map
interactive_traffic_map

11 Check out road/trail use density in relation to clusters

#Join to lines
osm_lines_d_clst <- left_join(osm_lines, pts_near_line_summary_clust, by = "line_id") %>%
  mutate(n_users = ifelse(is.na(n_users), 0, n_users),
         n_obs = ifelse(is.na(n_obs), 0, n_obs))
#View how road and trail use has varied by cluster over time
line_cluster_summary <- osm_lines_d_clst %>%
  st_drop_geometry() %>%
  group_by(year, line_class, cluster) %>%
  summarise(n_users = sum(n_users)) %>%
  left_join(yr_route_sum, by = c("year", "line_class")) %>%
  mutate(perc_users = round(n_users/total_yr_users*100, 1)) %>%
  mutate(cluster = as.factor(cluster))
`summarise()` has grouped output by 'year', 'line_class'. You can override
using the `.groups` argument.
line_clstr_summary_plt <- line_cluster_summary %>%
  mutate(period = case_when(year == 2019 ~ "pre",
                          year > 2019 &  year < 2023 ~ "peri",
                          year == 2023 ~ "post")) %>%
  mutate(period = factor(period, levels = c("pre", "peri", "post"))) %>%
  mutate(period = str_to_title(period)) %>%
  mutate(line_class = paste0(str_to_title(line_class), "s")) %>%
  filter(!is.na(cluster)) %>%
  rename(Cluster = cluster) %>%
  ggplot() + 
  geom_bar(aes(x = period, y = n_users, fill = Cluster), stat = "identity", 
           position = "stack") + 
    facet_wrap(~line_class) +
    theme_bw() + 
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(),
          strip.background = element_blank(), 
          strip.text = element_text(face = "bold"),
          axis.line = element_line(colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA, size = 1)) +
      labs(
         x = "Time Period",
         y = "Number of Users",
         color = "Cluster")

line_clstr_summary_plt + 
    scale_fill_brewer(palette = "Paired")

12 Examine movement of individual users

Get the number of observations each user has

#N obs per user
user_obs <- df_sf %>%
  st_drop_geometry() %>%
  group_by(user_id_yr) %>%
  summarise(n_obs = n()) 

df_sf <- df_sf %>%
  left_join(user_obs, by = "user_id_yr")

Check out the travel route of some sample users

Using user_id_yr which is an ID that accounts for users that visited multiple years

CLOSE_PTS_DIST_THRESH <- 100

#Grab a test user id 
set.seed(9); test_user_id <- df_sf %>%
  filter(n_obs > 5 & n_obs < 10) %>%
  pull(user_id_yr) %>%
  sample(1)

test_user_pts <- df_sf %>%
  filter(user_id_yr == test_user_id) %>%
  #Assign points an order id based on the date (oldest - newest)
  arrange(date) %>%
  mutate(point_id = paste0("p", row_number()))

#Calculate the pairwise distances between all points for user
distances <- test_user_pts %>%
  st_distance(test_user_pts) %>%
  drop_units() %>%
  as.data.frame() %>%
  mutate(point_id = paste0("p", seq(1, nrow(.)))) %>%
  setNames(c(paste0("p", seq(1, nrow(.))), "point_id")) %>%
  pivot_longer(-point_id, names_to = "point_id2", values_to = "dist") %>%
  filter(point_id != point_id2) %>%
  mutate(close_pt = ifelse(dist < CLOSE_PTS_DIST_THRESH, TRUE, FALSE)) %>%
  pivot_wider(names_from = point_id2, values_from = close_pt) %>%
  #Replace all rows with NA with FALSE
  mutate_all(~replace(., is.na(.), FALSE))


#Convert points to linestring which follow the order based on date
test_user_route <- test_user_pts %>%
  group_by(user_id_yr) %>%
  summarise() %>%
  st_cast("LINESTRING")

ggplot() + 
  geom_sf(data = test_user_route, size = 0.5, color = "grey") +
  geom_sf_label(data = test_user_pts, aes(label = user_id_yr), 
                nudge_x = 2, nudge_y = 2, size = 2.5) +
  theme_bw() + 
  # Remove all gridlines
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        # Add black outline to entire plot
        panel.border = element_rect(colour = "black", fill = NA, size = 1))